/*
 * ForeachWord.h
 *
 * Implements a collection of iterators for running over the multiple
 * parses of a sentence, the multiple word-instances of a parse, and so
 * on. The goal here is that these iterators hide the structural detail
 * of the opencog representation of sentences, pares, and so on. Thus,
 * if (when?) the opencog representation changes, then only this file
 * needs to be adjusted, instead of the broad sweep of algorithms.
 *
 * Copyright (c) 2008 Linas Vepstas <linas@linas.org>
 */

#ifndef _OPENCOG_FOREACH_WORD_H
#define _OPENCOG_FOREACH_WORD_H

#include <opencog/atoms/base/Atom.h>
#include <opencog/atomutils/ForeachChaseLink.h>
#include <opencog/atomutils/FollowLink.h>
#include <opencog/atoms/base/Link.h>
#include <opencog/atoms/base/Node.h>
#include <opencog/atoms/base/atom_types.h>
#include <opencog/nlp/types/atom_types.h>

namespace opencog {

/**
 * Call the callback for each parse in a sentence.  The argument handle
 * is presumed to identify  a SentenceNode, which is linked to parses
 * via a ParseLink:
 *
 *    ParseLink
 *        ParseNode "parse@2" strength=0.8 confidence=0.5
 *        SentenceNode "sentence@22"
 */
template<class T>
inline bool foreach_parse(const Handle& h, bool (T::*cb)(const Handle&), T *data)
{
	return foreach_reverse_binary_link(h, PARSE_LINK, cb, data);
}

/**
 * foreach_word_instance --
 * Call the callback for every word-instance in a parse. The argument
 * handle is presumed to identify a specific parse. The word-instances
 * are listed in sentence order, via a ReferenceLink:
 *
 *    ReferenceLink
 *       ParseNode "parse@3"
 *       ListLink
 *          WordInstanceNode "Jim@169"
 *          WordInstanceNode "stopped@456"
 */
template <class T>
inline bool foreach_word_instance(const Handle& ha, bool (T::*cb)(const Handle&), T *data)
{
	FollowLink fl;
	Handle h = fl.follow_binary_link(ha, REFERENCE_LINK);
	return LinkCast(h)->foreach_outgoing(cb, data);
}

/**
 * foreach_word_sense_of_inst --
 * Call the callback for each word-sense associated with a word-instance
 * The argument handle is presumed to identify a word instance, which is
 * linked to word senses via an InheritanceLink:
 *
 *   InheritanceLink strength=0.9 confidence=0.1
 *      WordInstanceNode "bark@144"
 *      WordSenseNode "bark%1:20:00::"
 *
 * The above link struct is generated by MihalceaLabel.cc, and any
 * changes there should be mirrored here.
 *
 * The callback has two handle arguments: the first argument will
 * contain the handle of the word-sense; the second arg will contain the
 * handle of the link making up the pair.
 */
template<typename T>
class PrivateUseOnlyEachSense
{
	public:
		bool (T::*user_cb)(const Handle&, const Handle&);
		T *user_data;
		bool sense_filter(const Handle& h, const Handle& l)
		{
			// Rule out relations that aren't actual word-senses.
			if (h->get_type() != WORD_SENSE_NODE) return false;
			return (user_data->*user_cb)(h, l);
		}
};

template<typename T>
inline bool foreach_word_sense_of_inst(const Handle& h,
                    bool (T::*cb)(const Handle&, const Handle&), T *data)
{
	PrivateUseOnlyEachSense<T> es;
	es.user_cb = cb;
	es.user_data = data;
	return foreach_binary_link(h, INHERITANCE_LINK, &PrivateUseOnlyEachSense<T>::sense_filter, &es);
}

/**
 * Given a dictionary word, call the callback for each word sense
 * associated with that dictionary word, for all parts-of-speech.
 * The argument is presumed to point at a specific dictionary word.
 *
 * Each dictionary-word is assumed to be linked to word senses via
 *
 *    WordSenseLink
 *       WordNode "bark"
 *       WordSenseNode "bark%1:20:00::"
 */
template <class T>
inline bool foreach_dict_word_sense(const Handle& h,
                     bool (T::*cb)(const Handle&), T *data)
{
	return foreach_binary_link(h, WORD_SENSE_LINK, cb, data);
}

/**
 * foreach_dict_word_sense_pos --
 * Given a dictionary word, call the callback for each word sense
 * associated with that dictionary word, for the indicated parts-of-speech.
 * The argument is presumed to point at a specific dictionary word.
 *
 * Each dictionary-word is assumed to be linked to word senses via
 *
 *    WordSenseLink
 *       WordNode "bark"
 *       WordSenseNode "bark%1:20:00::"
 *
 * Each word-sense is assumed to be linked to a part-of-speech via
 *
 *    PartOfSpeechLink
 *       WordSenseNode "bark%1:20:00::"
 *       DefinedLinguisticConceptNode "noun"
 *
 */
// The PrivateUseOnlyPOSFilter class should be local scope to
// foreach_dict_word_sense_pos() only, but C++ doesn't allow this. :-(
// This class is not for general, public use!
template <typename T>
class PrivateUseOnlyPOSFilter
{
	public:
		bool (T::*user_cb)(const Handle&);
		T *user_data;
		const std::string *desired_pos;
		bool pos_filter(const Handle& word_sense)
		{
			// Find the part-of-speech for this word-sense.
			FollowLink fl;
			Handle h(fl.follow_binary_link(word_sense, PART_OF_SPEECH_LINK));

			// The 'no-sense' special-case sense will not have a pos.
			const std::string &sense_pos = h->get_name();

			// If there's no POS match, skip this sense.
			if (desired_pos->compare(sense_pos)) return false;

			// If we are here, there's a match, so call the user callback.
			return (user_data->*user_cb)(h);
		}
};

template <typename T>
inline bool foreach_dict_word_sense_pos(const Handle& h, const std::string &pos,
                                        bool (T::*cb)(const Handle&), T *data)
{
	PrivateUseOnlyPOSFilter<T> pf;
	pf.user_cb = cb;
	pf.user_data = data;
	pf.desired_pos = &pos;
	return foreach_binary_link(h, WORD_SENSE_LINK,
	                         &PrivateUseOnlyPOSFilter<T>::pos_filter, &pf);
}

/**
 * get_part_of_speech - return part of speech for indicated atom.
 * @handle:  handle of a concept (word-instance or word-sense node).
 *
 * Return the part-of-speech for the indicated concept (word-instance
 * or word-sense node). This is just a predicate, looking for a link
 * type of "PartOfSpeech", and returning the second element of that
 * link, (which is assumed to be a node).
 *
 * Each word-instance is assumed to be linked to a part-of-speech via
 *
 *    PartOfSpeechLink
 *       WordInstanceNode "bark@169"
 *       DefinedLinguisticConceptNode "noun"
 *
 * Word-sense nodes are similarly linked:
 *
 *    PartOfSpeechLink
 *       WordSenseNode "bark%1:20:00::"
 *       DefinedLinguisticConceptNode "noun"
 */
inline const std::string get_part_of_speech(const Handle& word_instance)
{
	static std::string empty;

	// Find the part-of-speech for this word instance.
	FollowLink fl;
	Handle inst_pos(fl.follow_binary_link(word_instance, PART_OF_SPEECH_LINK));
	if (not inst_pos->is_link()) return empty;
	return inst_pos->get_name();
}

/**
 * Return the dictionary-word correspondng to a given word-instance.
 *
 * Each word-instance is assumed to be link to a single WordNode via
 * a ReferenceLink:
 *
 *    ReferenceLink
 *      WordInstanceNode "bark@169"
 *      WordNode "bark"
 */
inline Handle get_dict_word_of_word_instance(const Handle& word_instance)
{
	FollowLink fl;
	Handle dict_word = fl.follow_binary_link(word_instance, REFERENCE_LINK);
	return dict_word;
}

/**
 * Return the dictionary-word, in lemma form, correspondng to a given word-instance.
 *
 * Each word-instance is assumed to be link to a single WordNode via
 * a LemmaLink:
 *
 *    LemmaLink
 *      WordInstanceNode "was@169"
 *      WordNode "is"
 */
inline Handle get_lemma_of_word_instance(const Handle& word_instance)
{
	FollowLink fl;
	Handle dict_word = fl.follow_binary_link(word_instance, LEMMA_LINK);
	return dict_word;
}

/**
 * Follow sense edges.
 * It is assumed that the incoming handle is a (inst,sense) pair.
 * The callback is invoked for each edge, passing the far pair
 * as the first argument, and the edge itself as the second argument.
 */
template <typename T>
inline bool
foreach_sense_edge(const Handle& h,
                   bool (T::*cb)(const Handle&, const Handle&), T *data)
{
	return foreach_unordered_binary_link(h, COSENSE_LINK, cb, data);
}


/**
 * For each word-instance, loop over all syntactic relationships
 * (i.e. _subj, _obj, _nn, _amod, and so on). For each relationship,
 * call the indicated callback. The callback is passed the relation
 * name, and the two members of the relation.
 *
 * It is assumed that the relex relationships are structured as follows:
 *
 *    "The outfielder caught the ball."
 *    <!-- _subj (<<catch>>, <<outfielder>>) -->
 *    EvaluationLink
 *       DefinedLinguisticRelationshipNode "_subj"
 *       ListLink
 *          WordInstanceNode "catch@23abcfde4"
 *          WordInstanceNode "outfielder@feee438"
 *
 * It is assumed that the passed handle indicates the first word
 * instance in the relationship.
 */
// The PrivateUseOnlyRelationFinder class should be local scope to
// foreach_relex_relation() only, but C++ doesn't allow this. :-(
// This class is not for general, public use!
template <typename T>
class PrivateUseOnlyRelexRelationFinder
{
	private:
		Handle listlink;
		bool look_for_eval_link(const Handle& h)
		{
			Type t = h->get_type();
			if (t != EVALUATION_LINK) return false;

			// If we are here, lets see if the first node is a ling rel.
			const Handle& a(h->getOutgoingAtom(0));
			if (a->get_type() != DEFINED_LINGUISTIC_RELATIONSHIP_NODE) return false;

			// OK, we've found a relationship. Get the second member of
			// the list link, and call the user callback with it.
			const std::string &relname = a->get_name();

			const HandleSeq outset = listlink->getOutgoingSet();

			// First arg must be first (avoid reporting twice with swapped order).
			if (first_arg != outset[0]) return false;

			(user_data->*user_cb)(relname, outset[0], outset[1]);
			return false;
		}

	public:
		Handle first_arg;
		bool (T::*user_cb)(const std::string &, const Handle&, const Handle&);
		T *user_data;

		bool look_for_list_link(const Handle& h)
		{
			if (h->get_type() != LIST_LINK) return false;
			listlink = h;

			// If we are here, lets see if the list link is in eval link.
			h->foreach_incoming(&PrivateUseOnlyRelexRelationFinder::look_for_eval_link, this);
			return false;
		}
};

template <typename T>
inline bool
foreach_relex_relation(const Handle& h,
                       bool (T::*cb)(const std::string &, const Handle&, const Handle&), T *data)
{
	PrivateUseOnlyRelexRelationFinder<T> rrf;
	rrf.user_cb = cb;
	rrf.user_data = data;
	rrf.first_arg = h;
	return h->foreach_incoming(&PrivateUseOnlyRelexRelationFinder<T>::look_for_list_link, &rrf);
}

/**
 * Return the word-instance correspondng to a given word-sense-link
 *
 * Each word-sense-link is assumed to be link a word instance to a
 * sense node:
 *
 *    InheritanceLink
 *      WordInstanceNode "bark@144"
 *      WordSenseNode "bark%1:20:00::"
 */
inline Handle get_word_instance_of_sense_link(const Handle& h)
{
	return h->getOutgoingAtom(0);
}

/**
 * Return the sense node correspondng to a given word-sense-link
 *
 * Each word-sense-link is assumed to be link a word instance to a
 * sense node:
 *
 *    InheritanceLink
 *      WordInstanceNode "bark@144"
 *      WordSenseNode "bark%1:20:00::"
 */
inline Handle get_word_sense_of_sense_link(const Handle& h)
{
	return h->getOutgoingAtom(1);
}

} // namespace opencog

#endif // _OPENCOG_FOREACH_WORD_H
